* This file creates the coordinates by stratum that are used in the kriging interpolation
* Instructions on how to use geographical coordinates in stata are taken from https://www.stata.com/support/faqs/graphics/spmap-and-maps/

global name "s1_maps"
global main "C:/Users/silvio/Documents/CVR/ryp"
global output "$main/output"  
global data "${main}/data"  
global odata "${data}/Intermuestra1-2"  

global code "${main}/code"  
global coord "$data/coordinates"  
global geocoord "$data/geocoord"  

cd $main
capture log close
log using "${output}/${name}.log", replace
clear


* Step 1: Obtain and install the spmap, shp2dta, and mif2dta commands

/* If already installed, skip these commmands!
ssc install spmap
ssc install shp2dta
ssc install mif2dta
*/

/* Step 2: Find a map (an ESRI shapefile or a MapInfo Interchange Format file)
For Peru these files are obtained from
http://www.geogpsperu.com/2014/03/base-de-datos-peru-shapefile-shp-minam.html
*/

*Step 3: Translate the files
shp2dta using "${coord}/BAS_LIM_DISTRITOS", database("${geocoord}/peru_dist") coordinates("${geocoord}/peru_dist_coor") genid(id_di) 
shp2dta using "${coord}/BAS_LIM_PROVINCIA", database("${geocoord}/peru_prov") coordinates("${geocoord}/peru_prov_coor") genid(id_p) 
shp2dta using "${coord}/BAS_LIM_DEPARTAMENTO", database("${geocoord}/peru_dep") coordinates("${geocoord}/peru_dep_coor") genid(id_de) 


use "${geocoord}/peru_dist_coor", clear
sort _ID
by _ID: g x=sum(_X)
by _ID: replace x=x/_N
by _ID: g y=sum(_Y)
by _ID: replace y=y/_N
by _ID: keep if _n==_N
ren _ID id_di
ren x x_di
ren y y_di
sort id_di
drop _X _Y
save "${geocoord}/peru_di_acor", replace


use "${geocoord}/peru_prov_coor", clear
sort _ID
by _ID: g x=sum(_X)
by _ID: replace x=x/_N
by _ID: g y=sum(_Y)
by _ID: replace y=y/_N
by _ID: keep if _n==_N
ren _ID id_p
ren x x_p
ren y y_p
sort id_p
drop _X _Y
save "${geocoord}/peru_p_acor", replace

use "${geocoord}/peru_dep_coor", clear
sort _ID
by _ID: g x=sum(_X)
by _ID: replace x=x/_N
by _ID: g y=sum(_Y)
by _ID: replace y=y/_N
by _ID: keep if _n==_N
ren _ID id_de
ren x x_de
ren y y_de
sort id_de
drop _X _Y
save "${geocoord}/peru_de_acor", replace



*Step 4: Determine the coding used by the map
*use "C:/Users/silvio/Documents/CVR/stata_maps/Limite_distrital/peru_dist", clear
use "${geocoord}/peru_dist", clear
ren *, lower
destring iddist,g(lidist)
destring idprov,g(liprov)
destring iddpto,g(idepa)
sort liprov
save, replace

use "${geocoord}/peru_prov", clear
ren *, lower
destring first_idpr ,g(liprov)
sort liprov  first_fech
by liprov: keep if _n==_N
sort liprov
save, replace

use "${geocoord}/peru_dep", clear
ren *, lower
destring  first_iddp ,g(idepa)
sort idepa
save, replace

use "${geocoord}/peru_dist", clear
merge m:1 liprov using "${geocoord}/peru_prov"
drop _merge
sort idepa
merge m:1 idepa using "${geocoord}/peru_dep"
drop _merge

gen iprov=liprov-idepa*100
gen idist=int(lidist-idepa*10000-iprov*100)
g loddist=idepa*10000+iprov*100+idist

do "$code/strata58" /*Creates j that defines 58 strata of BASM*/

sort id_di
merge m:1 id_di using "${geocoord}/peru_di_acor"
drop _merge

sort id_p
merge m:1 id_p using "${geocoord}/peru_p_acor"
drop _merge

sort id_de
merge m:1 id_de using "${geocoord}/peru_de_acor"
drop _merge

sort j
save "${geocoord}/peru_all", replace



/*
Step 5: Merge datasets
*/

use "${data}/cvrextrapo", clear
g loddist=idepa*10000+iprov*100+idist
sort j loddist
by j loddist: g nobs=_N
by j loddist: keep if _n==_N
sort j nobs
by j: keep if _n==_N
drop if j==.
keep j loddist
sort j
merge 1:m j loddist using "${geocoord}/peru_all", keep(3)
keep j x_di y_di 
ren j i
sort i
save "${geocoord}/distxy", replace

****************************************************************************************
capture log close
clear
